{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "94841ed5",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "b30d1ce7",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 相对路径：相对于你当前文件的路径\n",
    "data = pd.read_csv(\"data/movie_metadata.csv\")\n",
    "# 绝对路径：完整的路径\n",
    "data1 = pd.read_csv(\"C:\\\\Users\\\\zzk10\\\\PycharmProjects\\\\python-demo1\\\\notebook\\\\data\\\\movie_metadata.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "4fa36338",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>director_name</th>\n",
       "      <th>num_critic_for_reviews</th>\n",
       "      <th>duration</th>\n",
       "      <th>director_facebook_likes</th>\n",
       "      <th>actor_3_facebook_likes</th>\n",
       "      <th>actor_2_name</th>\n",
       "      <th>actor_1_facebook_likes</th>\n",
       "      <th>gross</th>\n",
       "      <th>genres</th>\n",
       "      <th>...</th>\n",
       "      <th>num_user_for_reviews</th>\n",
       "      <th>language</th>\n",
       "      <th>country</th>\n",
       "      <th>content_rating</th>\n",
       "      <th>budget</th>\n",
       "      <th>title_year</th>\n",
       "      <th>actor_2_facebook_likes</th>\n",
       "      <th>imdb_score</th>\n",
       "      <th>aspect_ratio</th>\n",
       "      <th>movie_facebook_likes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Color</td>\n",
       "      <td>James Cameron</td>\n",
       "      <td>723.0</td>\n",
       "      <td>178.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>855.0</td>\n",
       "      <td>Joel David Moore</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>760505847.0</td>\n",
       "      <td>Action|Adventure|Fantasy|Sci-Fi</td>\n",
       "      <td>...</td>\n",
       "      <td>3054.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>237000000.0</td>\n",
       "      <td>2009.0</td>\n",
       "      <td>936.0</td>\n",
       "      <td>7.9</td>\n",
       "      <td>1.78</td>\n",
       "      <td>33000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Color</td>\n",
       "      <td>Gore Verbinski</td>\n",
       "      <td>302.0</td>\n",
       "      <td>169.0</td>\n",
       "      <td>563.0</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>Orlando Bloom</td>\n",
       "      <td>40000.0</td>\n",
       "      <td>309404152.0</td>\n",
       "      <td>Action|Adventure|Fantasy</td>\n",
       "      <td>...</td>\n",
       "      <td>1238.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>300000000.0</td>\n",
       "      <td>2007.0</td>\n",
       "      <td>5000.0</td>\n",
       "      <td>7.1</td>\n",
       "      <td>2.35</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Color</td>\n",
       "      <td>Sam Mendes</td>\n",
       "      <td>602.0</td>\n",
       "      <td>148.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>161.0</td>\n",
       "      <td>Rory Kinnear</td>\n",
       "      <td>11000.0</td>\n",
       "      <td>200074175.0</td>\n",
       "      <td>Action|Adventure|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>994.0</td>\n",
       "      <td>English</td>\n",
       "      <td>UK</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>245000000.0</td>\n",
       "      <td>2015.0</td>\n",
       "      <td>393.0</td>\n",
       "      <td>6.8</td>\n",
       "      <td>2.35</td>\n",
       "      <td>85000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Color</td>\n",
       "      <td>Christopher Nolan</td>\n",
       "      <td>813.0</td>\n",
       "      <td>164.0</td>\n",
       "      <td>22000.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>Christian Bale</td>\n",
       "      <td>27000.0</td>\n",
       "      <td>448130642.0</td>\n",
       "      <td>Action|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>2701.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>250000000.0</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>8.5</td>\n",
       "      <td>2.35</td>\n",
       "      <td>164000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>Doug Walker</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>131.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Rob Walker</td>\n",
       "      <td>131.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Documentary</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>12.0</td>\n",
       "      <td>7.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 28 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   color      director_name  num_critic_for_reviews  duration  \\\n",
       "0  Color      James Cameron                   723.0     178.0   \n",
       "1  Color     Gore Verbinski                   302.0     169.0   \n",
       "2  Color         Sam Mendes                   602.0     148.0   \n",
       "3  Color  Christopher Nolan                   813.0     164.0   \n",
       "4    NaN        Doug Walker                     NaN       NaN   \n",
       "\n",
       "   director_facebook_likes  actor_3_facebook_likes      actor_2_name  \\\n",
       "0                      0.0                   855.0  Joel David Moore   \n",
       "1                    563.0                  1000.0     Orlando Bloom   \n",
       "2                      0.0                   161.0      Rory Kinnear   \n",
       "3                  22000.0                 23000.0    Christian Bale   \n",
       "4                    131.0                     NaN        Rob Walker   \n",
       "\n",
       "   actor_1_facebook_likes        gross                           genres  ...  \\\n",
       "0                  1000.0  760505847.0  Action|Adventure|Fantasy|Sci-Fi  ...   \n",
       "1                 40000.0  309404152.0         Action|Adventure|Fantasy  ...   \n",
       "2                 11000.0  200074175.0        Action|Adventure|Thriller  ...   \n",
       "3                 27000.0  448130642.0                  Action|Thriller  ...   \n",
       "4                   131.0          NaN                      Documentary  ...   \n",
       "\n",
       "  num_user_for_reviews language  country  content_rating       budget  \\\n",
       "0               3054.0  English      USA           PG-13  237000000.0   \n",
       "1               1238.0  English      USA           PG-13  300000000.0   \n",
       "2                994.0  English       UK           PG-13  245000000.0   \n",
       "3               2701.0  English      USA           PG-13  250000000.0   \n",
       "4                  NaN      NaN      NaN             NaN          NaN   \n",
       "\n",
       "   title_year actor_2_facebook_likes imdb_score  aspect_ratio  \\\n",
       "0      2009.0                  936.0        7.9          1.78   \n",
       "1      2007.0                 5000.0        7.1          2.35   \n",
       "2      2015.0                  393.0        6.8          2.35   \n",
       "3      2012.0                23000.0        8.5          2.35   \n",
       "4         NaN                   12.0        7.1           NaN   \n",
       "\n",
       "  movie_facebook_likes  \n",
       "0                33000  \n",
       "1                    0  \n",
       "2                85000  \n",
       "3               164000  \n",
       "4                    0  \n",
       "\n",
       "[5 rows x 28 columns]"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "d0f8700b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.core.frame.DataFrame"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "a14e2cc6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>director_name</th>\n",
       "      <th>actor_2_name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>James Cameron</td>\n",
       "      <td>Joel David Moore</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Gore Verbinski</td>\n",
       "      <td>Orlando Bloom</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Sam Mendes</td>\n",
       "      <td>Rory Kinnear</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Christopher Nolan</td>\n",
       "      <td>Christian Bale</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Doug Walker</td>\n",
       "      <td>Rob Walker</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5038</th>\n",
       "      <td>Scott Smith</td>\n",
       "      <td>Daphne Zuniga</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5039</th>\n",
       "      <td>NaN</td>\n",
       "      <td>Valorie Curry</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5040</th>\n",
       "      <td>Benjamin Roberds</td>\n",
       "      <td>Maxwell Moody</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5041</th>\n",
       "      <td>Daniel Hsia</td>\n",
       "      <td>Daniel Henney</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5042</th>\n",
       "      <td>Jon Gunn</td>\n",
       "      <td>Brian Herzlinger</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5043 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          director_name      actor_2_name\n",
       "0         James Cameron  Joel David Moore\n",
       "1        Gore Verbinski     Orlando Bloom\n",
       "2            Sam Mendes      Rory Kinnear\n",
       "3     Christopher Nolan    Christian Bale\n",
       "4           Doug Walker        Rob Walker\n",
       "...                 ...               ...\n",
       "5038        Scott Smith     Daphne Zuniga\n",
       "5039                NaN     Valorie Curry\n",
       "5040   Benjamin Roberds     Maxwell Moody\n",
       "5041        Daniel Hsia     Daniel Henney\n",
       "5042           Jon Gunn  Brian Herzlinger\n",
       "\n",
       "[5043 rows x 2 columns]"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[['director_name','actor_2_name']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "d0ecd0c2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count                 4939\n",
       "unique                2398\n",
       "top       Steven Spielberg\n",
       "freq                    26\n",
       "Name: director_name, dtype: object"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.director_name.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "6008f71b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>director_name</th>\n",
       "      <th>num_critic_for_reviews</th>\n",
       "      <th>duration</th>\n",
       "      <th>director_facebook_likes</th>\n",
       "      <th>actor_3_facebook_likes</th>\n",
       "      <th>actor_2_name</th>\n",
       "      <th>actor_1_facebook_likes</th>\n",
       "      <th>gross</th>\n",
       "      <th>genres</th>\n",
       "      <th>...</th>\n",
       "      <th>num_user_for_reviews</th>\n",
       "      <th>language</th>\n",
       "      <th>country</th>\n",
       "      <th>content_rating</th>\n",
       "      <th>budget</th>\n",
       "      <th>title_year</th>\n",
       "      <th>actor_2_facebook_likes</th>\n",
       "      <th>imdb_score</th>\n",
       "      <th>aspect_ratio</th>\n",
       "      <th>movie_facebook_likes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Color</td>\n",
       "      <td>James Cameron</td>\n",
       "      <td>723.0</td>\n",
       "      <td>178.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>855.0</td>\n",
       "      <td>Joel David Moore</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>760505847.0</td>\n",
       "      <td>Action|Adventure|Fantasy|Sci-Fi</td>\n",
       "      <td>...</td>\n",
       "      <td>3054.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>237000000.0</td>\n",
       "      <td>2009.0</td>\n",
       "      <td>936.0</td>\n",
       "      <td>7.9</td>\n",
       "      <td>1.78</td>\n",
       "      <td>33000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Color</td>\n",
       "      <td>Gore Verbinski</td>\n",
       "      <td>302.0</td>\n",
       "      <td>169.0</td>\n",
       "      <td>563.0</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>Orlando Bloom</td>\n",
       "      <td>40000.0</td>\n",
       "      <td>309404152.0</td>\n",
       "      <td>Action|Adventure|Fantasy</td>\n",
       "      <td>...</td>\n",
       "      <td>1238.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>300000000.0</td>\n",
       "      <td>2007.0</td>\n",
       "      <td>5000.0</td>\n",
       "      <td>7.1</td>\n",
       "      <td>2.35</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Color</td>\n",
       "      <td>Sam Mendes</td>\n",
       "      <td>602.0</td>\n",
       "      <td>148.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>161.0</td>\n",
       "      <td>Rory Kinnear</td>\n",
       "      <td>11000.0</td>\n",
       "      <td>200074175.0</td>\n",
       "      <td>Action|Adventure|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>994.0</td>\n",
       "      <td>English</td>\n",
       "      <td>UK</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>245000000.0</td>\n",
       "      <td>2015.0</td>\n",
       "      <td>393.0</td>\n",
       "      <td>6.8</td>\n",
       "      <td>2.35</td>\n",
       "      <td>85000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Color</td>\n",
       "      <td>Christopher Nolan</td>\n",
       "      <td>813.0</td>\n",
       "      <td>164.0</td>\n",
       "      <td>22000.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>Christian Bale</td>\n",
       "      <td>27000.0</td>\n",
       "      <td>448130642.0</td>\n",
       "      <td>Action|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>2701.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>250000000.0</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>8.5</td>\n",
       "      <td>2.35</td>\n",
       "      <td>164000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Color</td>\n",
       "      <td>Andrew Stanton</td>\n",
       "      <td>462.0</td>\n",
       "      <td>132.0</td>\n",
       "      <td>475.0</td>\n",
       "      <td>530.0</td>\n",
       "      <td>Samantha Morton</td>\n",
       "      <td>640.0</td>\n",
       "      <td>73058679.0</td>\n",
       "      <td>Action|Adventure|Sci-Fi</td>\n",
       "      <td>...</td>\n",
       "      <td>738.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>263700000.0</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>632.0</td>\n",
       "      <td>6.6</td>\n",
       "      <td>2.35</td>\n",
       "      <td>24000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5038</th>\n",
       "      <td>Color</td>\n",
       "      <td>Scott Smith</td>\n",
       "      <td>1.0</td>\n",
       "      <td>87.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>318.0</td>\n",
       "      <td>Daphne Zuniga</td>\n",
       "      <td>637.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Comedy|Drama</td>\n",
       "      <td>...</td>\n",
       "      <td>6.0</td>\n",
       "      <td>English</td>\n",
       "      <td>Canada</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2013.0</td>\n",
       "      <td>470.0</td>\n",
       "      <td>7.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5039</th>\n",
       "      <td>Color</td>\n",
       "      <td>NaN</td>\n",
       "      <td>43.0</td>\n",
       "      <td>43.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>319.0</td>\n",
       "      <td>Valorie Curry</td>\n",
       "      <td>841.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Crime|Drama|Mystery|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>359.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>TV-14</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>593.0</td>\n",
       "      <td>7.5</td>\n",
       "      <td>16.00</td>\n",
       "      <td>32000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5040</th>\n",
       "      <td>Color</td>\n",
       "      <td>Benjamin Roberds</td>\n",
       "      <td>13.0</td>\n",
       "      <td>76.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Maxwell Moody</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Drama|Horror|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1400.0</td>\n",
       "      <td>2013.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5041</th>\n",
       "      <td>Color</td>\n",
       "      <td>Daniel Hsia</td>\n",
       "      <td>14.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>489.0</td>\n",
       "      <td>Daniel Henney</td>\n",
       "      <td>946.0</td>\n",
       "      <td>10443.0</td>\n",
       "      <td>Comedy|Drama|Romance</td>\n",
       "      <td>...</td>\n",
       "      <td>9.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>719.0</td>\n",
       "      <td>6.3</td>\n",
       "      <td>2.35</td>\n",
       "      <td>660</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5042</th>\n",
       "      <td>Color</td>\n",
       "      <td>Jon Gunn</td>\n",
       "      <td>43.0</td>\n",
       "      <td>90.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>Brian Herzlinger</td>\n",
       "      <td>86.0</td>\n",
       "      <td>85222.0</td>\n",
       "      <td>Documentary</td>\n",
       "      <td>...</td>\n",
       "      <td>84.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG</td>\n",
       "      <td>1100.0</td>\n",
       "      <td>2004.0</td>\n",
       "      <td>23.0</td>\n",
       "      <td>6.6</td>\n",
       "      <td>1.85</td>\n",
       "      <td>456</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>4704 rows × 28 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      color      director_name  num_critic_for_reviews  duration  \\\n",
       "0     Color      James Cameron                   723.0     178.0   \n",
       "1     Color     Gore Verbinski                   302.0     169.0   \n",
       "2     Color         Sam Mendes                   602.0     148.0   \n",
       "3     Color  Christopher Nolan                   813.0     164.0   \n",
       "5     Color     Andrew Stanton                   462.0     132.0   \n",
       "...     ...                ...                     ...       ...   \n",
       "5038  Color        Scott Smith                     1.0      87.0   \n",
       "5039  Color                NaN                    43.0      43.0   \n",
       "5040  Color   Benjamin Roberds                    13.0      76.0   \n",
       "5041  Color        Daniel Hsia                    14.0     100.0   \n",
       "5042  Color           Jon Gunn                    43.0      90.0   \n",
       "\n",
       "      director_facebook_likes  actor_3_facebook_likes      actor_2_name  \\\n",
       "0                         0.0                   855.0  Joel David Moore   \n",
       "1                       563.0                  1000.0     Orlando Bloom   \n",
       "2                         0.0                   161.0      Rory Kinnear   \n",
       "3                     22000.0                 23000.0    Christian Bale   \n",
       "5                       475.0                   530.0   Samantha Morton   \n",
       "...                       ...                     ...               ...   \n",
       "5038                      2.0                   318.0     Daphne Zuniga   \n",
       "5039                      NaN                   319.0     Valorie Curry   \n",
       "5040                      0.0                     0.0     Maxwell Moody   \n",
       "5041                      0.0                   489.0     Daniel Henney   \n",
       "5042                     16.0                    16.0  Brian Herzlinger   \n",
       "\n",
       "      actor_1_facebook_likes        gross                           genres  \\\n",
       "0                     1000.0  760505847.0  Action|Adventure|Fantasy|Sci-Fi   \n",
       "1                    40000.0  309404152.0         Action|Adventure|Fantasy   \n",
       "2                    11000.0  200074175.0        Action|Adventure|Thriller   \n",
       "3                    27000.0  448130642.0                  Action|Thriller   \n",
       "5                      640.0   73058679.0          Action|Adventure|Sci-Fi   \n",
       "...                      ...          ...                              ...   \n",
       "5038                   637.0          NaN                     Comedy|Drama   \n",
       "5039                   841.0          NaN     Crime|Drama|Mystery|Thriller   \n",
       "5040                     0.0          NaN            Drama|Horror|Thriller   \n",
       "5041                   946.0      10443.0             Comedy|Drama|Romance   \n",
       "5042                    86.0      85222.0                      Documentary   \n",
       "\n",
       "      ... num_user_for_reviews language  country  content_rating       budget  \\\n",
       "0     ...               3054.0  English      USA           PG-13  237000000.0   \n",
       "1     ...               1238.0  English      USA           PG-13  300000000.0   \n",
       "2     ...                994.0  English       UK           PG-13  245000000.0   \n",
       "3     ...               2701.0  English      USA           PG-13  250000000.0   \n",
       "5     ...                738.0  English      USA           PG-13  263700000.0   \n",
       "...   ...                  ...      ...      ...             ...          ...   \n",
       "5038  ...                  6.0  English   Canada             NaN          NaN   \n",
       "5039  ...                359.0  English      USA           TV-14          NaN   \n",
       "5040  ...                  3.0  English      USA             NaN       1400.0   \n",
       "5041  ...                  9.0  English      USA           PG-13          NaN   \n",
       "5042  ...                 84.0  English      USA              PG       1100.0   \n",
       "\n",
       "      title_year actor_2_facebook_likes imdb_score  aspect_ratio  \\\n",
       "0         2009.0                  936.0        7.9          1.78   \n",
       "1         2007.0                 5000.0        7.1          2.35   \n",
       "2         2015.0                  393.0        6.8          2.35   \n",
       "3         2012.0                23000.0        8.5          2.35   \n",
       "5         2012.0                  632.0        6.6          2.35   \n",
       "...          ...                    ...        ...           ...   \n",
       "5038      2013.0                  470.0        7.7           NaN   \n",
       "5039         NaN                  593.0        7.5         16.00   \n",
       "5040      2013.0                    0.0        6.3           NaN   \n",
       "5041      2012.0                  719.0        6.3          2.35   \n",
       "5042      2004.0                   23.0        6.6          1.85   \n",
       "\n",
       "     movie_facebook_likes  \n",
       "0                   33000  \n",
       "1                       0  \n",
       "2                   85000  \n",
       "3                  164000  \n",
       "5                   24000  \n",
       "...                   ...  \n",
       "5038                   84  \n",
       "5039                32000  \n",
       "5040                   16  \n",
       "5041                  660  \n",
       "5042                  456  \n",
       "\n",
       "[4704 rows x 28 columns]"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[data['language'] == 'English']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "afc46ee0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>director_name</th>\n",
       "      <th>actor_2_name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>James Cameron</td>\n",
       "      <td>Joel David Moore</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Gore Verbinski</td>\n",
       "      <td>Orlando Bloom</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Sam Mendes</td>\n",
       "      <td>Rory Kinnear</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Christopher Nolan</td>\n",
       "      <td>Christian Bale</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Doug Walker</td>\n",
       "      <td>Rob Walker</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       director_name      actor_2_name\n",
       "0      James Cameron  Joel David Moore\n",
       "1     Gore Verbinski     Orlando Bloom\n",
       "2         Sam Mendes      Rory Kinnear\n",
       "3  Christopher Nolan    Christian Bale\n",
       "4        Doug Walker        Rob Walker"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[['director_name','actor_2_name']][:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "ee58e070",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0          USA\n",
       "1          USA\n",
       "2           UK\n",
       "3          USA\n",
       "4          NaN\n",
       "         ...  \n",
       "5038    Canada\n",
       "5039       USA\n",
       "5040       USA\n",
       "5041       USA\n",
       "5042       USA\n",
       "Name: country, Length: 5043, dtype: object"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.country"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "d66ce2f3",
   "metadata": {},
   "outputs": [],
   "source": [
    "data.country = data.country.fillna(\"\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "7b1d5042",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0          USA\n",
       "1          USA\n",
       "2           UK\n",
       "3          USA\n",
       "4             \n",
       "         ...  \n",
       "5038    Canada\n",
       "5039       USA\n",
       "5040       USA\n",
       "5041       USA\n",
       "5042       USA\n",
       "Name: country, Length: 5043, dtype: object"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.country"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "ab22c5ad",
   "metadata": {},
   "outputs": [],
   "source": [
    "data.duration = data.duration.fillna(data.duration.mean())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "2ed47678",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0       178.000000\n",
       "1       169.000000\n",
       "2       148.000000\n",
       "3       164.000000\n",
       "4       107.201074\n",
       "           ...    \n",
       "5038     87.000000\n",
       "5039     43.000000\n",
       "5040     76.000000\n",
       "5041    100.000000\n",
       "5042     90.000000\n",
       "Name: duration, Length: 5043, dtype: float64"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.duration"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "a05b7e31",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>director_name</th>\n",
       "      <th>num_critic_for_reviews</th>\n",
       "      <th>duration</th>\n",
       "      <th>director_facebook_likes</th>\n",
       "      <th>actor_3_facebook_likes</th>\n",
       "      <th>actor_2_name</th>\n",
       "      <th>actor_1_facebook_likes</th>\n",
       "      <th>gross</th>\n",
       "      <th>genres</th>\n",
       "      <th>...</th>\n",
       "      <th>num_user_for_reviews</th>\n",
       "      <th>language</th>\n",
       "      <th>country</th>\n",
       "      <th>content_rating</th>\n",
       "      <th>budget</th>\n",
       "      <th>title_year</th>\n",
       "      <th>actor_2_facebook_likes</th>\n",
       "      <th>imdb_score</th>\n",
       "      <th>aspect_ratio</th>\n",
       "      <th>movie_facebook_likes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Color</td>\n",
       "      <td>James Cameron</td>\n",
       "      <td>723.0</td>\n",
       "      <td>178.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>855.0</td>\n",
       "      <td>Joel David Moore</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>760505847.0</td>\n",
       "      <td>Action|Adventure|Fantasy|Sci-Fi</td>\n",
       "      <td>...</td>\n",
       "      <td>3054.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>237000000.0</td>\n",
       "      <td>2009.0</td>\n",
       "      <td>936.0</td>\n",
       "      <td>7.9</td>\n",
       "      <td>1.78</td>\n",
       "      <td>33000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Color</td>\n",
       "      <td>Gore Verbinski</td>\n",
       "      <td>302.0</td>\n",
       "      <td>169.0</td>\n",
       "      <td>563.0</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>Orlando Bloom</td>\n",
       "      <td>40000.0</td>\n",
       "      <td>309404152.0</td>\n",
       "      <td>Action|Adventure|Fantasy</td>\n",
       "      <td>...</td>\n",
       "      <td>1238.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>300000000.0</td>\n",
       "      <td>2007.0</td>\n",
       "      <td>5000.0</td>\n",
       "      <td>7.1</td>\n",
       "      <td>2.35</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Color</td>\n",
       "      <td>Sam Mendes</td>\n",
       "      <td>602.0</td>\n",
       "      <td>148.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>161.0</td>\n",
       "      <td>Rory Kinnear</td>\n",
       "      <td>11000.0</td>\n",
       "      <td>200074175.0</td>\n",
       "      <td>Action|Adventure|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>994.0</td>\n",
       "      <td>English</td>\n",
       "      <td>UK</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>245000000.0</td>\n",
       "      <td>2015.0</td>\n",
       "      <td>393.0</td>\n",
       "      <td>6.8</td>\n",
       "      <td>2.35</td>\n",
       "      <td>85000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Color</td>\n",
       "      <td>Christopher Nolan</td>\n",
       "      <td>813.0</td>\n",
       "      <td>164.0</td>\n",
       "      <td>22000.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>Christian Bale</td>\n",
       "      <td>27000.0</td>\n",
       "      <td>448130642.0</td>\n",
       "      <td>Action|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>2701.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>250000000.0</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>8.5</td>\n",
       "      <td>2.35</td>\n",
       "      <td>164000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Color</td>\n",
       "      <td>Andrew Stanton</td>\n",
       "      <td>462.0</td>\n",
       "      <td>132.0</td>\n",
       "      <td>475.0</td>\n",
       "      <td>530.0</td>\n",
       "      <td>Samantha Morton</td>\n",
       "      <td>640.0</td>\n",
       "      <td>73058679.0</td>\n",
       "      <td>Action|Adventure|Sci-Fi</td>\n",
       "      <td>...</td>\n",
       "      <td>738.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>263700000.0</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>632.0</td>\n",
       "      <td>6.6</td>\n",
       "      <td>2.35</td>\n",
       "      <td>24000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5026</th>\n",
       "      <td>Color</td>\n",
       "      <td>Olivier Assayas</td>\n",
       "      <td>81.0</td>\n",
       "      <td>110.0</td>\n",
       "      <td>107.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>Béatrice Dalle</td>\n",
       "      <td>576.0</td>\n",
       "      <td>136007.0</td>\n",
       "      <td>Drama|Music|Romance</td>\n",
       "      <td>...</td>\n",
       "      <td>39.0</td>\n",
       "      <td>French</td>\n",
       "      <td>France</td>\n",
       "      <td>R</td>\n",
       "      <td>4500.0</td>\n",
       "      <td>2004.0</td>\n",
       "      <td>133.0</td>\n",
       "      <td>6.9</td>\n",
       "      <td>2.35</td>\n",
       "      <td>171</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5027</th>\n",
       "      <td>Color</td>\n",
       "      <td>Jafar Panahi</td>\n",
       "      <td>64.0</td>\n",
       "      <td>90.0</td>\n",
       "      <td>397.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Nargess Mamizadeh</td>\n",
       "      <td>5.0</td>\n",
       "      <td>673780.0</td>\n",
       "      <td>Drama</td>\n",
       "      <td>...</td>\n",
       "      <td>26.0</td>\n",
       "      <td>Persian</td>\n",
       "      <td>Iran</td>\n",
       "      <td>Not Rated</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>2000.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7.5</td>\n",
       "      <td>1.85</td>\n",
       "      <td>697</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5033</th>\n",
       "      <td>Color</td>\n",
       "      <td>Shane Carruth</td>\n",
       "      <td>143.0</td>\n",
       "      <td>77.0</td>\n",
       "      <td>291.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>David Sullivan</td>\n",
       "      <td>291.0</td>\n",
       "      <td>424760.0</td>\n",
       "      <td>Drama|Sci-Fi|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>371.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>7000.0</td>\n",
       "      <td>2004.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1.85</td>\n",
       "      <td>19000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5035</th>\n",
       "      <td>Color</td>\n",
       "      <td>Robert Rodriguez</td>\n",
       "      <td>56.0</td>\n",
       "      <td>81.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>Peter Marquardt</td>\n",
       "      <td>121.0</td>\n",
       "      <td>2040920.0</td>\n",
       "      <td>Action|Crime|Drama|Romance|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>130.0</td>\n",
       "      <td>Spanish</td>\n",
       "      <td>USA</td>\n",
       "      <td>R</td>\n",
       "      <td>7000.0</td>\n",
       "      <td>1992.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>6.9</td>\n",
       "      <td>1.37</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5042</th>\n",
       "      <td>Color</td>\n",
       "      <td>Jon Gunn</td>\n",
       "      <td>43.0</td>\n",
       "      <td>90.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>Brian Herzlinger</td>\n",
       "      <td>86.0</td>\n",
       "      <td>85222.0</td>\n",
       "      <td>Documentary</td>\n",
       "      <td>...</td>\n",
       "      <td>84.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG</td>\n",
       "      <td>1100.0</td>\n",
       "      <td>2004.0</td>\n",
       "      <td>23.0</td>\n",
       "      <td>6.6</td>\n",
       "      <td>1.85</td>\n",
       "      <td>456</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3756 rows × 28 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      color      director_name  num_critic_for_reviews  duration  \\\n",
       "0     Color      James Cameron                   723.0     178.0   \n",
       "1     Color     Gore Verbinski                   302.0     169.0   \n",
       "2     Color         Sam Mendes                   602.0     148.0   \n",
       "3     Color  Christopher Nolan                   813.0     164.0   \n",
       "5     Color     Andrew Stanton                   462.0     132.0   \n",
       "...     ...                ...                     ...       ...   \n",
       "5026  Color    Olivier Assayas                    81.0     110.0   \n",
       "5027  Color       Jafar Panahi                    64.0      90.0   \n",
       "5033  Color      Shane Carruth                   143.0      77.0   \n",
       "5035  Color   Robert Rodriguez                    56.0      81.0   \n",
       "5042  Color           Jon Gunn                    43.0      90.0   \n",
       "\n",
       "      director_facebook_likes  actor_3_facebook_likes       actor_2_name  \\\n",
       "0                         0.0                   855.0   Joel David Moore   \n",
       "1                       563.0                  1000.0      Orlando Bloom   \n",
       "2                         0.0                   161.0       Rory Kinnear   \n",
       "3                     22000.0                 23000.0     Christian Bale   \n",
       "5                       475.0                   530.0    Samantha Morton   \n",
       "...                       ...                     ...                ...   \n",
       "5026                    107.0                    45.0     Béatrice Dalle   \n",
       "5027                    397.0                     0.0  Nargess Mamizadeh   \n",
       "5033                    291.0                     8.0     David Sullivan   \n",
       "5035                      0.0                     6.0    Peter Marquardt   \n",
       "5042                     16.0                    16.0   Brian Herzlinger   \n",
       "\n",
       "      actor_1_facebook_likes        gross  \\\n",
       "0                     1000.0  760505847.0   \n",
       "1                    40000.0  309404152.0   \n",
       "2                    11000.0  200074175.0   \n",
       "3                    27000.0  448130642.0   \n",
       "5                      640.0   73058679.0   \n",
       "...                      ...          ...   \n",
       "5026                   576.0     136007.0   \n",
       "5027                     5.0     673780.0   \n",
       "5033                   291.0     424760.0   \n",
       "5035                   121.0    2040920.0   \n",
       "5042                    86.0      85222.0   \n",
       "\n",
       "                                   genres  ... num_user_for_reviews language  \\\n",
       "0         Action|Adventure|Fantasy|Sci-Fi  ...               3054.0  English   \n",
       "1                Action|Adventure|Fantasy  ...               1238.0  English   \n",
       "2               Action|Adventure|Thriller  ...                994.0  English   \n",
       "3                         Action|Thriller  ...               2701.0  English   \n",
       "5                 Action|Adventure|Sci-Fi  ...                738.0  English   \n",
       "...                                   ...  ...                  ...      ...   \n",
       "5026                  Drama|Music|Romance  ...                 39.0   French   \n",
       "5027                                Drama  ...                 26.0  Persian   \n",
       "5033                Drama|Sci-Fi|Thriller  ...                371.0  English   \n",
       "5035  Action|Crime|Drama|Romance|Thriller  ...                130.0  Spanish   \n",
       "5042                          Documentary  ...                 84.0  English   \n",
       "\n",
       "      country  content_rating       budget  title_year actor_2_facebook_likes  \\\n",
       "0         USA           PG-13  237000000.0      2009.0                  936.0   \n",
       "1         USA           PG-13  300000000.0      2007.0                 5000.0   \n",
       "2          UK           PG-13  245000000.0      2015.0                  393.0   \n",
       "3         USA           PG-13  250000000.0      2012.0                23000.0   \n",
       "5         USA           PG-13  263700000.0      2012.0                  632.0   \n",
       "...       ...             ...          ...         ...                    ...   \n",
       "5026   France               R       4500.0      2004.0                  133.0   \n",
       "5027     Iran       Not Rated      10000.0      2000.0                    0.0   \n",
       "5033      USA           PG-13       7000.0      2004.0                   45.0   \n",
       "5035      USA               R       7000.0      1992.0                   20.0   \n",
       "5042      USA              PG       1100.0      2004.0                   23.0   \n",
       "\n",
       "     imdb_score  aspect_ratio movie_facebook_likes  \n",
       "0           7.9          1.78                33000  \n",
       "1           7.1          2.35                    0  \n",
       "2           6.8          2.35                85000  \n",
       "3           8.5          2.35               164000  \n",
       "5           6.6          2.35                24000  \n",
       "...         ...           ...                  ...  \n",
       "5026        6.9          2.35                  171  \n",
       "5027        7.5          1.85                  697  \n",
       "5033        7.0          1.85                19000  \n",
       "5035        6.9          1.37                    0  \n",
       "5042        6.6          1.85                  456  \n",
       "\n",
       "[3756 rows x 28 columns]"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 返回一个新的DataFrame 并不会对原来的DataFrame造成影响\n",
    "# 可以将inplace置为True，表示对原来的DataFrame进行修改\n",
    "data.dropna()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "39002021",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>director_name</th>\n",
       "      <th>num_critic_for_reviews</th>\n",
       "      <th>duration</th>\n",
       "      <th>director_facebook_likes</th>\n",
       "      <th>actor_3_facebook_likes</th>\n",
       "      <th>actor_2_name</th>\n",
       "      <th>actor_1_facebook_likes</th>\n",
       "      <th>gross</th>\n",
       "      <th>genres</th>\n",
       "      <th>...</th>\n",
       "      <th>num_user_for_reviews</th>\n",
       "      <th>language</th>\n",
       "      <th>country</th>\n",
       "      <th>content_rating</th>\n",
       "      <th>budget</th>\n",
       "      <th>title_year</th>\n",
       "      <th>actor_2_facebook_likes</th>\n",
       "      <th>imdb_score</th>\n",
       "      <th>aspect_ratio</th>\n",
       "      <th>movie_facebook_likes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Color</td>\n",
       "      <td>James Cameron</td>\n",
       "      <td>723.0</td>\n",
       "      <td>178.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>855.0</td>\n",
       "      <td>Joel David Moore</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>760505847.0</td>\n",
       "      <td>Action|Adventure|Fantasy|Sci-Fi</td>\n",
       "      <td>...</td>\n",
       "      <td>3054.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>237000000.0</td>\n",
       "      <td>2009.0</td>\n",
       "      <td>936.0</td>\n",
       "      <td>7.9</td>\n",
       "      <td>1.78</td>\n",
       "      <td>33000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Color</td>\n",
       "      <td>Gore Verbinski</td>\n",
       "      <td>302.0</td>\n",
       "      <td>169.000000</td>\n",
       "      <td>563.0</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>Orlando Bloom</td>\n",
       "      <td>40000.0</td>\n",
       "      <td>309404152.0</td>\n",
       "      <td>Action|Adventure|Fantasy</td>\n",
       "      <td>...</td>\n",
       "      <td>1238.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>300000000.0</td>\n",
       "      <td>2007.0</td>\n",
       "      <td>5000.0</td>\n",
       "      <td>7.1</td>\n",
       "      <td>2.35</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Color</td>\n",
       "      <td>Sam Mendes</td>\n",
       "      <td>602.0</td>\n",
       "      <td>148.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>161.0</td>\n",
       "      <td>Rory Kinnear</td>\n",
       "      <td>11000.0</td>\n",
       "      <td>200074175.0</td>\n",
       "      <td>Action|Adventure|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>994.0</td>\n",
       "      <td>English</td>\n",
       "      <td>UK</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>245000000.0</td>\n",
       "      <td>2015.0</td>\n",
       "      <td>393.0</td>\n",
       "      <td>6.8</td>\n",
       "      <td>2.35</td>\n",
       "      <td>85000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Color</td>\n",
       "      <td>Christopher Nolan</td>\n",
       "      <td>813.0</td>\n",
       "      <td>164.000000</td>\n",
       "      <td>22000.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>Christian Bale</td>\n",
       "      <td>27000.0</td>\n",
       "      <td>448130642.0</td>\n",
       "      <td>Action|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>2701.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>250000000.0</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>8.5</td>\n",
       "      <td>2.35</td>\n",
       "      <td>164000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>Doug Walker</td>\n",
       "      <td>NaN</td>\n",
       "      <td>107.201074</td>\n",
       "      <td>131.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Rob Walker</td>\n",
       "      <td>131.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Documentary</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>12.0</td>\n",
       "      <td>7.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 28 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   color      director_name  num_critic_for_reviews    duration  \\\n",
       "0  Color      James Cameron                   723.0  178.000000   \n",
       "1  Color     Gore Verbinski                   302.0  169.000000   \n",
       "2  Color         Sam Mendes                   602.0  148.000000   \n",
       "3  Color  Christopher Nolan                   813.0  164.000000   \n",
       "4    NaN        Doug Walker                     NaN  107.201074   \n",
       "\n",
       "   director_facebook_likes  actor_3_facebook_likes      actor_2_name  \\\n",
       "0                      0.0                   855.0  Joel David Moore   \n",
       "1                    563.0                  1000.0     Orlando Bloom   \n",
       "2                      0.0                   161.0      Rory Kinnear   \n",
       "3                  22000.0                 23000.0    Christian Bale   \n",
       "4                    131.0                     NaN        Rob Walker   \n",
       "\n",
       "   actor_1_facebook_likes        gross                           genres  ...  \\\n",
       "0                  1000.0  760505847.0  Action|Adventure|Fantasy|Sci-Fi  ...   \n",
       "1                 40000.0  309404152.0         Action|Adventure|Fantasy  ...   \n",
       "2                 11000.0  200074175.0        Action|Adventure|Thriller  ...   \n",
       "3                 27000.0  448130642.0                  Action|Thriller  ...   \n",
       "4                   131.0          NaN                      Documentary  ...   \n",
       "\n",
       "  num_user_for_reviews language  country  content_rating       budget  \\\n",
       "0               3054.0  English      USA           PG-13  237000000.0   \n",
       "1               1238.0  English      USA           PG-13  300000000.0   \n",
       "2                994.0  English       UK           PG-13  245000000.0   \n",
       "3               2701.0  English      USA           PG-13  250000000.0   \n",
       "4                  NaN      NaN                      NaN          NaN   \n",
       "\n",
       "   title_year actor_2_facebook_likes imdb_score  aspect_ratio  \\\n",
       "0      2009.0                  936.0        7.9          1.78   \n",
       "1      2007.0                 5000.0        7.1          2.35   \n",
       "2      2015.0                  393.0        6.8          2.35   \n",
       "3      2012.0                23000.0        8.5          2.35   \n",
       "4         NaN                   12.0        7.1           NaN   \n",
       "\n",
       "  movie_facebook_likes  \n",
       "0                33000  \n",
       "1                    0  \n",
       "2                85000  \n",
       "3               164000  \n",
       "4                    0  \n",
       "\n",
       "[5 rows x 28 columns]"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "df6fc443",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>director_name</th>\n",
       "      <th>num_critic_for_reviews</th>\n",
       "      <th>duration</th>\n",
       "      <th>director_facebook_likes</th>\n",
       "      <th>actor_3_facebook_likes</th>\n",
       "      <th>actor_2_name</th>\n",
       "      <th>actor_1_facebook_likes</th>\n",
       "      <th>gross</th>\n",
       "      <th>genres</th>\n",
       "      <th>...</th>\n",
       "      <th>num_user_for_reviews</th>\n",
       "      <th>language</th>\n",
       "      <th>country</th>\n",
       "      <th>content_rating</th>\n",
       "      <th>budget</th>\n",
       "      <th>title_year</th>\n",
       "      <th>actor_2_facebook_likes</th>\n",
       "      <th>imdb_score</th>\n",
       "      <th>aspect_ratio</th>\n",
       "      <th>movie_facebook_likes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Color</td>\n",
       "      <td>James Cameron</td>\n",
       "      <td>723.0</td>\n",
       "      <td>178.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>855.0</td>\n",
       "      <td>Joel David Moore</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>760505847.0</td>\n",
       "      <td>Action|Adventure|Fantasy|Sci-Fi</td>\n",
       "      <td>...</td>\n",
       "      <td>3054.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>237000000.0</td>\n",
       "      <td>2009.0</td>\n",
       "      <td>936.0</td>\n",
       "      <td>7.9</td>\n",
       "      <td>1.78</td>\n",
       "      <td>33000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Color</td>\n",
       "      <td>Gore Verbinski</td>\n",
       "      <td>302.0</td>\n",
       "      <td>169.000000</td>\n",
       "      <td>563.0</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>Orlando Bloom</td>\n",
       "      <td>40000.0</td>\n",
       "      <td>309404152.0</td>\n",
       "      <td>Action|Adventure|Fantasy</td>\n",
       "      <td>...</td>\n",
       "      <td>1238.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>300000000.0</td>\n",
       "      <td>2007.0</td>\n",
       "      <td>5000.0</td>\n",
       "      <td>7.1</td>\n",
       "      <td>2.35</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Color</td>\n",
       "      <td>Sam Mendes</td>\n",
       "      <td>602.0</td>\n",
       "      <td>148.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>161.0</td>\n",
       "      <td>Rory Kinnear</td>\n",
       "      <td>11000.0</td>\n",
       "      <td>200074175.0</td>\n",
       "      <td>Action|Adventure|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>994.0</td>\n",
       "      <td>English</td>\n",
       "      <td>UK</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>245000000.0</td>\n",
       "      <td>2015.0</td>\n",
       "      <td>393.0</td>\n",
       "      <td>6.8</td>\n",
       "      <td>2.35</td>\n",
       "      <td>85000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Color</td>\n",
       "      <td>Christopher Nolan</td>\n",
       "      <td>813.0</td>\n",
       "      <td>164.000000</td>\n",
       "      <td>22000.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>Christian Bale</td>\n",
       "      <td>27000.0</td>\n",
       "      <td>448130642.0</td>\n",
       "      <td>Action|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>2701.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>250000000.0</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>8.5</td>\n",
       "      <td>2.35</td>\n",
       "      <td>164000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>Doug Walker</td>\n",
       "      <td>NaN</td>\n",
       "      <td>107.201074</td>\n",
       "      <td>131.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Rob Walker</td>\n",
       "      <td>131.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Documentary</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>12.0</td>\n",
       "      <td>7.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5038</th>\n",
       "      <td>Color</td>\n",
       "      <td>Scott Smith</td>\n",
       "      <td>1.0</td>\n",
       "      <td>87.000000</td>\n",
       "      <td>2.0</td>\n",
       "      <td>318.0</td>\n",
       "      <td>Daphne Zuniga</td>\n",
       "      <td>637.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Comedy|Drama</td>\n",
       "      <td>...</td>\n",
       "      <td>6.0</td>\n",
       "      <td>English</td>\n",
       "      <td>Canada</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2013.0</td>\n",
       "      <td>470.0</td>\n",
       "      <td>7.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5039</th>\n",
       "      <td>Color</td>\n",
       "      <td>NaN</td>\n",
       "      <td>43.0</td>\n",
       "      <td>43.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>319.0</td>\n",
       "      <td>Valorie Curry</td>\n",
       "      <td>841.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Crime|Drama|Mystery|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>359.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>TV-14</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>593.0</td>\n",
       "      <td>7.5</td>\n",
       "      <td>16.00</td>\n",
       "      <td>32000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5040</th>\n",
       "      <td>Color</td>\n",
       "      <td>Benjamin Roberds</td>\n",
       "      <td>13.0</td>\n",
       "      <td>76.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Maxwell Moody</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Drama|Horror|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1400.0</td>\n",
       "      <td>2013.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5041</th>\n",
       "      <td>Color</td>\n",
       "      <td>Daniel Hsia</td>\n",
       "      <td>14.0</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>489.0</td>\n",
       "      <td>Daniel Henney</td>\n",
       "      <td>946.0</td>\n",
       "      <td>10443.0</td>\n",
       "      <td>Comedy|Drama|Romance</td>\n",
       "      <td>...</td>\n",
       "      <td>9.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>719.0</td>\n",
       "      <td>6.3</td>\n",
       "      <td>2.35</td>\n",
       "      <td>660</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5042</th>\n",
       "      <td>Color</td>\n",
       "      <td>Jon Gunn</td>\n",
       "      <td>43.0</td>\n",
       "      <td>90.000000</td>\n",
       "      <td>16.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>Brian Herzlinger</td>\n",
       "      <td>86.0</td>\n",
       "      <td>85222.0</td>\n",
       "      <td>Documentary</td>\n",
       "      <td>...</td>\n",
       "      <td>84.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG</td>\n",
       "      <td>1100.0</td>\n",
       "      <td>2004.0</td>\n",
       "      <td>23.0</td>\n",
       "      <td>6.6</td>\n",
       "      <td>1.85</td>\n",
       "      <td>456</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5043 rows × 28 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      color      director_name  num_critic_for_reviews    duration  \\\n",
       "0     Color      James Cameron                   723.0  178.000000   \n",
       "1     Color     Gore Verbinski                   302.0  169.000000   \n",
       "2     Color         Sam Mendes                   602.0  148.000000   \n",
       "3     Color  Christopher Nolan                   813.0  164.000000   \n",
       "4       NaN        Doug Walker                     NaN  107.201074   \n",
       "...     ...                ...                     ...         ...   \n",
       "5038  Color        Scott Smith                     1.0   87.000000   \n",
       "5039  Color                NaN                    43.0   43.000000   \n",
       "5040  Color   Benjamin Roberds                    13.0   76.000000   \n",
       "5041  Color        Daniel Hsia                    14.0  100.000000   \n",
       "5042  Color           Jon Gunn                    43.0   90.000000   \n",
       "\n",
       "      director_facebook_likes  actor_3_facebook_likes      actor_2_name  \\\n",
       "0                         0.0                   855.0  Joel David Moore   \n",
       "1                       563.0                  1000.0     Orlando Bloom   \n",
       "2                         0.0                   161.0      Rory Kinnear   \n",
       "3                     22000.0                 23000.0    Christian Bale   \n",
       "4                       131.0                     NaN        Rob Walker   \n",
       "...                       ...                     ...               ...   \n",
       "5038                      2.0                   318.0     Daphne Zuniga   \n",
       "5039                      NaN                   319.0     Valorie Curry   \n",
       "5040                      0.0                     0.0     Maxwell Moody   \n",
       "5041                      0.0                   489.0     Daniel Henney   \n",
       "5042                     16.0                    16.0  Brian Herzlinger   \n",
       "\n",
       "      actor_1_facebook_likes        gross                           genres  \\\n",
       "0                     1000.0  760505847.0  Action|Adventure|Fantasy|Sci-Fi   \n",
       "1                    40000.0  309404152.0         Action|Adventure|Fantasy   \n",
       "2                    11000.0  200074175.0        Action|Adventure|Thriller   \n",
       "3                    27000.0  448130642.0                  Action|Thriller   \n",
       "4                      131.0          NaN                      Documentary   \n",
       "...                      ...          ...                              ...   \n",
       "5038                   637.0          NaN                     Comedy|Drama   \n",
       "5039                   841.0          NaN     Crime|Drama|Mystery|Thriller   \n",
       "5040                     0.0          NaN            Drama|Horror|Thriller   \n",
       "5041                   946.0      10443.0             Comedy|Drama|Romance   \n",
       "5042                    86.0      85222.0                      Documentary   \n",
       "\n",
       "      ... num_user_for_reviews language  country  content_rating       budget  \\\n",
       "0     ...               3054.0  English      USA           PG-13  237000000.0   \n",
       "1     ...               1238.0  English      USA           PG-13  300000000.0   \n",
       "2     ...                994.0  English       UK           PG-13  245000000.0   \n",
       "3     ...               2701.0  English      USA           PG-13  250000000.0   \n",
       "4     ...                  NaN      NaN                      NaN          NaN   \n",
       "...   ...                  ...      ...      ...             ...          ...   \n",
       "5038  ...                  6.0  English   Canada             NaN          NaN   \n",
       "5039  ...                359.0  English      USA           TV-14          NaN   \n",
       "5040  ...                  3.0  English      USA             NaN       1400.0   \n",
       "5041  ...                  9.0  English      USA           PG-13          NaN   \n",
       "5042  ...                 84.0  English      USA              PG       1100.0   \n",
       "\n",
       "      title_year actor_2_facebook_likes imdb_score  aspect_ratio  \\\n",
       "0         2009.0                  936.0        7.9          1.78   \n",
       "1         2007.0                 5000.0        7.1          2.35   \n",
       "2         2015.0                  393.0        6.8          2.35   \n",
       "3         2012.0                23000.0        8.5          2.35   \n",
       "4            NaN                   12.0        7.1           NaN   \n",
       "...          ...                    ...        ...           ...   \n",
       "5038      2013.0                  470.0        7.7           NaN   \n",
       "5039         NaN                  593.0        7.5         16.00   \n",
       "5040      2013.0                    0.0        6.3           NaN   \n",
       "5041      2012.0                  719.0        6.3          2.35   \n",
       "5042      2004.0                   23.0        6.6          1.85   \n",
       "\n",
       "     movie_facebook_likes  \n",
       "0                   33000  \n",
       "1                       0  \n",
       "2                   85000  \n",
       "3                  164000  \n",
       "4                       0  \n",
       "...                   ...  \n",
       "5038                   84  \n",
       "5039                32000  \n",
       "5040                   16  \n",
       "5041                  660  \n",
       "5042                  456  \n",
       "\n",
       "[5043 rows x 28 columns]"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.dropna(how=\"all\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "2be9f774",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>director_name</th>\n",
       "      <th>num_critic_for_reviews</th>\n",
       "      <th>duration</th>\n",
       "      <th>director_facebook_likes</th>\n",
       "      <th>actor_3_facebook_likes</th>\n",
       "      <th>actor_2_name</th>\n",
       "      <th>actor_1_facebook_likes</th>\n",
       "      <th>gross</th>\n",
       "      <th>genres</th>\n",
       "      <th>...</th>\n",
       "      <th>num_user_for_reviews</th>\n",
       "      <th>language</th>\n",
       "      <th>country</th>\n",
       "      <th>content_rating</th>\n",
       "      <th>budget</th>\n",
       "      <th>title_year</th>\n",
       "      <th>actor_2_facebook_likes</th>\n",
       "      <th>imdb_score</th>\n",
       "      <th>aspect_ratio</th>\n",
       "      <th>movie_facebook_likes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Color</td>\n",
       "      <td>James Cameron</td>\n",
       "      <td>723.0</td>\n",
       "      <td>178.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>855.0</td>\n",
       "      <td>Joel David Moore</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>760505847.0</td>\n",
       "      <td>Action|Adventure|Fantasy|Sci-Fi</td>\n",
       "      <td>...</td>\n",
       "      <td>3054.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>237000000.0</td>\n",
       "      <td>2009.0</td>\n",
       "      <td>936.0</td>\n",
       "      <td>7.9</td>\n",
       "      <td>1.78</td>\n",
       "      <td>33000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Color</td>\n",
       "      <td>Gore Verbinski</td>\n",
       "      <td>302.0</td>\n",
       "      <td>169.000000</td>\n",
       "      <td>563.0</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>Orlando Bloom</td>\n",
       "      <td>40000.0</td>\n",
       "      <td>309404152.0</td>\n",
       "      <td>Action|Adventure|Fantasy</td>\n",
       "      <td>...</td>\n",
       "      <td>1238.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>300000000.0</td>\n",
       "      <td>2007.0</td>\n",
       "      <td>5000.0</td>\n",
       "      <td>7.1</td>\n",
       "      <td>2.35</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Color</td>\n",
       "      <td>Sam Mendes</td>\n",
       "      <td>602.0</td>\n",
       "      <td>148.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>161.0</td>\n",
       "      <td>Rory Kinnear</td>\n",
       "      <td>11000.0</td>\n",
       "      <td>200074175.0</td>\n",
       "      <td>Action|Adventure|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>994.0</td>\n",
       "      <td>English</td>\n",
       "      <td>UK</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>245000000.0</td>\n",
       "      <td>2015.0</td>\n",
       "      <td>393.0</td>\n",
       "      <td>6.8</td>\n",
       "      <td>2.35</td>\n",
       "      <td>85000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Color</td>\n",
       "      <td>Christopher Nolan</td>\n",
       "      <td>813.0</td>\n",
       "      <td>164.000000</td>\n",
       "      <td>22000.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>Christian Bale</td>\n",
       "      <td>27000.0</td>\n",
       "      <td>448130642.0</td>\n",
       "      <td>Action|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>2701.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>250000000.0</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>8.5</td>\n",
       "      <td>2.35</td>\n",
       "      <td>164000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>Doug Walker</td>\n",
       "      <td>NaN</td>\n",
       "      <td>107.201074</td>\n",
       "      <td>131.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Rob Walker</td>\n",
       "      <td>131.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Documentary</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>12.0</td>\n",
       "      <td>7.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5038</th>\n",
       "      <td>Color</td>\n",
       "      <td>Scott Smith</td>\n",
       "      <td>1.0</td>\n",
       "      <td>87.000000</td>\n",
       "      <td>2.0</td>\n",
       "      <td>318.0</td>\n",
       "      <td>Daphne Zuniga</td>\n",
       "      <td>637.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Comedy|Drama</td>\n",
       "      <td>...</td>\n",
       "      <td>6.0</td>\n",
       "      <td>English</td>\n",
       "      <td>Canada</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2013.0</td>\n",
       "      <td>470.0</td>\n",
       "      <td>7.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5039</th>\n",
       "      <td>Color</td>\n",
       "      <td>NaN</td>\n",
       "      <td>43.0</td>\n",
       "      <td>43.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>319.0</td>\n",
       "      <td>Valorie Curry</td>\n",
       "      <td>841.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Crime|Drama|Mystery|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>359.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>TV-14</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>593.0</td>\n",
       "      <td>7.5</td>\n",
       "      <td>16.00</td>\n",
       "      <td>32000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5040</th>\n",
       "      <td>Color</td>\n",
       "      <td>Benjamin Roberds</td>\n",
       "      <td>13.0</td>\n",
       "      <td>76.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Maxwell Moody</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Drama|Horror|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1400.0</td>\n",
       "      <td>2013.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5041</th>\n",
       "      <td>Color</td>\n",
       "      <td>Daniel Hsia</td>\n",
       "      <td>14.0</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>489.0</td>\n",
       "      <td>Daniel Henney</td>\n",
       "      <td>946.0</td>\n",
       "      <td>10443.0</td>\n",
       "      <td>Comedy|Drama|Romance</td>\n",
       "      <td>...</td>\n",
       "      <td>9.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>719.0</td>\n",
       "      <td>6.3</td>\n",
       "      <td>2.35</td>\n",
       "      <td>660</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5042</th>\n",
       "      <td>Color</td>\n",
       "      <td>Jon Gunn</td>\n",
       "      <td>43.0</td>\n",
       "      <td>90.000000</td>\n",
       "      <td>16.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>Brian Herzlinger</td>\n",
       "      <td>86.0</td>\n",
       "      <td>85222.0</td>\n",
       "      <td>Documentary</td>\n",
       "      <td>...</td>\n",
       "      <td>84.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG</td>\n",
       "      <td>1100.0</td>\n",
       "      <td>2004.0</td>\n",
       "      <td>23.0</td>\n",
       "      <td>6.6</td>\n",
       "      <td>1.85</td>\n",
       "      <td>456</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5043 rows × 28 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      color      director_name  num_critic_for_reviews    duration  \\\n",
       "0     Color      James Cameron                   723.0  178.000000   \n",
       "1     Color     Gore Verbinski                   302.0  169.000000   \n",
       "2     Color         Sam Mendes                   602.0  148.000000   \n",
       "3     Color  Christopher Nolan                   813.0  164.000000   \n",
       "4       NaN        Doug Walker                     NaN  107.201074   \n",
       "...     ...                ...                     ...         ...   \n",
       "5038  Color        Scott Smith                     1.0   87.000000   \n",
       "5039  Color                NaN                    43.0   43.000000   \n",
       "5040  Color   Benjamin Roberds                    13.0   76.000000   \n",
       "5041  Color        Daniel Hsia                    14.0  100.000000   \n",
       "5042  Color           Jon Gunn                    43.0   90.000000   \n",
       "\n",
       "      director_facebook_likes  actor_3_facebook_likes      actor_2_name  \\\n",
       "0                         0.0                   855.0  Joel David Moore   \n",
       "1                       563.0                  1000.0     Orlando Bloom   \n",
       "2                         0.0                   161.0      Rory Kinnear   \n",
       "3                     22000.0                 23000.0    Christian Bale   \n",
       "4                       131.0                     NaN        Rob Walker   \n",
       "...                       ...                     ...               ...   \n",
       "5038                      2.0                   318.0     Daphne Zuniga   \n",
       "5039                      NaN                   319.0     Valorie Curry   \n",
       "5040                      0.0                     0.0     Maxwell Moody   \n",
       "5041                      0.0                   489.0     Daniel Henney   \n",
       "5042                     16.0                    16.0  Brian Herzlinger   \n",
       "\n",
       "      actor_1_facebook_likes        gross                           genres  \\\n",
       "0                     1000.0  760505847.0  Action|Adventure|Fantasy|Sci-Fi   \n",
       "1                    40000.0  309404152.0         Action|Adventure|Fantasy   \n",
       "2                    11000.0  200074175.0        Action|Adventure|Thriller   \n",
       "3                    27000.0  448130642.0                  Action|Thriller   \n",
       "4                      131.0          NaN                      Documentary   \n",
       "...                      ...          ...                              ...   \n",
       "5038                   637.0          NaN                     Comedy|Drama   \n",
       "5039                   841.0          NaN     Crime|Drama|Mystery|Thriller   \n",
       "5040                     0.0          NaN            Drama|Horror|Thriller   \n",
       "5041                   946.0      10443.0             Comedy|Drama|Romance   \n",
       "5042                    86.0      85222.0                      Documentary   \n",
       "\n",
       "      ... num_user_for_reviews language  country  content_rating       budget  \\\n",
       "0     ...               3054.0  English      USA           PG-13  237000000.0   \n",
       "1     ...               1238.0  English      USA           PG-13  300000000.0   \n",
       "2     ...                994.0  English       UK           PG-13  245000000.0   \n",
       "3     ...               2701.0  English      USA           PG-13  250000000.0   \n",
       "4     ...                  NaN      NaN                      NaN          NaN   \n",
       "...   ...                  ...      ...      ...             ...          ...   \n",
       "5038  ...                  6.0  English   Canada             NaN          NaN   \n",
       "5039  ...                359.0  English      USA           TV-14          NaN   \n",
       "5040  ...                  3.0  English      USA             NaN       1400.0   \n",
       "5041  ...                  9.0  English      USA           PG-13          NaN   \n",
       "5042  ...                 84.0  English      USA              PG       1100.0   \n",
       "\n",
       "      title_year actor_2_facebook_likes imdb_score  aspect_ratio  \\\n",
       "0         2009.0                  936.0        7.9          1.78   \n",
       "1         2007.0                 5000.0        7.1          2.35   \n",
       "2         2015.0                  393.0        6.8          2.35   \n",
       "3         2012.0                23000.0        8.5          2.35   \n",
       "4            NaN                   12.0        7.1           NaN   \n",
       "...          ...                    ...        ...           ...   \n",
       "5038      2013.0                  470.0        7.7           NaN   \n",
       "5039         NaN                  593.0        7.5         16.00   \n",
       "5040      2013.0                    0.0        6.3           NaN   \n",
       "5041      2012.0                  719.0        6.3          2.35   \n",
       "5042      2004.0                   23.0        6.6          1.85   \n",
       "\n",
       "     movie_facebook_likes  \n",
       "0                   33000  \n",
       "1                       0  \n",
       "2                   85000  \n",
       "3                  164000  \n",
       "4                       0  \n",
       "...                   ...  \n",
       "5038                   84  \n",
       "5039                32000  \n",
       "5040                   16  \n",
       "5041                  660  \n",
       "5042                  456  \n",
       "\n",
       "[5043 rows x 28 columns]"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.dropna(axis=1,how='all')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "id": "a9d6b835",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>director_name</th>\n",
       "      <th>num_critic_for_reviews</th>\n",
       "      <th>duration</th>\n",
       "      <th>director_facebook_likes</th>\n",
       "      <th>actor_3_facebook_likes</th>\n",
       "      <th>actor_2_name</th>\n",
       "      <th>actor_1_facebook_likes</th>\n",
       "      <th>gross</th>\n",
       "      <th>genres</th>\n",
       "      <th>...</th>\n",
       "      <th>num_user_for_reviews</th>\n",
       "      <th>language</th>\n",
       "      <th>country</th>\n",
       "      <th>content_rating</th>\n",
       "      <th>budget</th>\n",
       "      <th>title_year</th>\n",
       "      <th>actor_2_facebook_likes</th>\n",
       "      <th>imdb_score</th>\n",
       "      <th>aspect_ratio</th>\n",
       "      <th>movie_facebook_likes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Color</td>\n",
       "      <td>James Cameron</td>\n",
       "      <td>723.0</td>\n",
       "      <td>178.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>855.0</td>\n",
       "      <td>Joel David Moore</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>760505847.0</td>\n",
       "      <td>Action|Adventure|Fantasy|Sci-Fi</td>\n",
       "      <td>...</td>\n",
       "      <td>3054.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>237000000.0</td>\n",
       "      <td>2009.0</td>\n",
       "      <td>936.0</td>\n",
       "      <td>7.9</td>\n",
       "      <td>1.78</td>\n",
       "      <td>33000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Color</td>\n",
       "      <td>Gore Verbinski</td>\n",
       "      <td>302.0</td>\n",
       "      <td>169.0</td>\n",
       "      <td>563.0</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>Orlando Bloom</td>\n",
       "      <td>40000.0</td>\n",
       "      <td>309404152.0</td>\n",
       "      <td>Action|Adventure|Fantasy</td>\n",
       "      <td>...</td>\n",
       "      <td>1238.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>300000000.0</td>\n",
       "      <td>2007.0</td>\n",
       "      <td>5000.0</td>\n",
       "      <td>7.1</td>\n",
       "      <td>2.35</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Color</td>\n",
       "      <td>Sam Mendes</td>\n",
       "      <td>602.0</td>\n",
       "      <td>148.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>161.0</td>\n",
       "      <td>Rory Kinnear</td>\n",
       "      <td>11000.0</td>\n",
       "      <td>200074175.0</td>\n",
       "      <td>Action|Adventure|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>994.0</td>\n",
       "      <td>English</td>\n",
       "      <td>UK</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>245000000.0</td>\n",
       "      <td>2015.0</td>\n",
       "      <td>393.0</td>\n",
       "      <td>6.8</td>\n",
       "      <td>2.35</td>\n",
       "      <td>85000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Color</td>\n",
       "      <td>Christopher Nolan</td>\n",
       "      <td>813.0</td>\n",
       "      <td>164.0</td>\n",
       "      <td>22000.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>Christian Bale</td>\n",
       "      <td>27000.0</td>\n",
       "      <td>448130642.0</td>\n",
       "      <td>Action|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>2701.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>250000000.0</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>8.5</td>\n",
       "      <td>2.35</td>\n",
       "      <td>164000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Color</td>\n",
       "      <td>Andrew Stanton</td>\n",
       "      <td>462.0</td>\n",
       "      <td>132.0</td>\n",
       "      <td>475.0</td>\n",
       "      <td>530.0</td>\n",
       "      <td>Samantha Morton</td>\n",
       "      <td>640.0</td>\n",
       "      <td>73058679.0</td>\n",
       "      <td>Action|Adventure|Sci-Fi</td>\n",
       "      <td>...</td>\n",
       "      <td>738.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>263700000.0</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>632.0</td>\n",
       "      <td>6.6</td>\n",
       "      <td>2.35</td>\n",
       "      <td>24000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5038</th>\n",
       "      <td>Color</td>\n",
       "      <td>Scott Smith</td>\n",
       "      <td>1.0</td>\n",
       "      <td>87.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>318.0</td>\n",
       "      <td>Daphne Zuniga</td>\n",
       "      <td>637.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Comedy|Drama</td>\n",
       "      <td>...</td>\n",
       "      <td>6.0</td>\n",
       "      <td>English</td>\n",
       "      <td>Canada</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2013.0</td>\n",
       "      <td>470.0</td>\n",
       "      <td>7.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5039</th>\n",
       "      <td>Color</td>\n",
       "      <td>NaN</td>\n",
       "      <td>43.0</td>\n",
       "      <td>43.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>319.0</td>\n",
       "      <td>Valorie Curry</td>\n",
       "      <td>841.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Crime|Drama|Mystery|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>359.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>TV-14</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>593.0</td>\n",
       "      <td>7.5</td>\n",
       "      <td>16.00</td>\n",
       "      <td>32000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5040</th>\n",
       "      <td>Color</td>\n",
       "      <td>Benjamin Roberds</td>\n",
       "      <td>13.0</td>\n",
       "      <td>76.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Maxwell Moody</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Drama|Horror|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1400.0</td>\n",
       "      <td>2013.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5041</th>\n",
       "      <td>Color</td>\n",
       "      <td>Daniel Hsia</td>\n",
       "      <td>14.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>489.0</td>\n",
       "      <td>Daniel Henney</td>\n",
       "      <td>946.0</td>\n",
       "      <td>10443.0</td>\n",
       "      <td>Comedy|Drama|Romance</td>\n",
       "      <td>...</td>\n",
       "      <td>9.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>719.0</td>\n",
       "      <td>6.3</td>\n",
       "      <td>2.35</td>\n",
       "      <td>660</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5042</th>\n",
       "      <td>Color</td>\n",
       "      <td>Jon Gunn</td>\n",
       "      <td>43.0</td>\n",
       "      <td>90.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>Brian Herzlinger</td>\n",
       "      <td>86.0</td>\n",
       "      <td>85222.0</td>\n",
       "      <td>Documentary</td>\n",
       "      <td>...</td>\n",
       "      <td>84.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG</td>\n",
       "      <td>1100.0</td>\n",
       "      <td>2004.0</td>\n",
       "      <td>23.0</td>\n",
       "      <td>6.6</td>\n",
       "      <td>1.85</td>\n",
       "      <td>456</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5036 rows × 28 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      color      director_name  num_critic_for_reviews  duration  \\\n",
       "0     Color      James Cameron                   723.0     178.0   \n",
       "1     Color     Gore Verbinski                   302.0     169.0   \n",
       "2     Color         Sam Mendes                   602.0     148.0   \n",
       "3     Color  Christopher Nolan                   813.0     164.0   \n",
       "5     Color     Andrew Stanton                   462.0     132.0   \n",
       "...     ...                ...                     ...       ...   \n",
       "5038  Color        Scott Smith                     1.0      87.0   \n",
       "5039  Color                NaN                    43.0      43.0   \n",
       "5040  Color   Benjamin Roberds                    13.0      76.0   \n",
       "5041  Color        Daniel Hsia                    14.0     100.0   \n",
       "5042  Color           Jon Gunn                    43.0      90.0   \n",
       "\n",
       "      director_facebook_likes  actor_3_facebook_likes      actor_2_name  \\\n",
       "0                         0.0                   855.0  Joel David Moore   \n",
       "1                       563.0                  1000.0     Orlando Bloom   \n",
       "2                         0.0                   161.0      Rory Kinnear   \n",
       "3                     22000.0                 23000.0    Christian Bale   \n",
       "5                       475.0                   530.0   Samantha Morton   \n",
       "...                       ...                     ...               ...   \n",
       "5038                      2.0                   318.0     Daphne Zuniga   \n",
       "5039                      NaN                   319.0     Valorie Curry   \n",
       "5040                      0.0                     0.0     Maxwell Moody   \n",
       "5041                      0.0                   489.0     Daniel Henney   \n",
       "5042                     16.0                    16.0  Brian Herzlinger   \n",
       "\n",
       "      actor_1_facebook_likes        gross                           genres  \\\n",
       "0                     1000.0  760505847.0  Action|Adventure|Fantasy|Sci-Fi   \n",
       "1                    40000.0  309404152.0         Action|Adventure|Fantasy   \n",
       "2                    11000.0  200074175.0        Action|Adventure|Thriller   \n",
       "3                    27000.0  448130642.0                  Action|Thriller   \n",
       "5                      640.0   73058679.0          Action|Adventure|Sci-Fi   \n",
       "...                      ...          ...                              ...   \n",
       "5038                   637.0          NaN                     Comedy|Drama   \n",
       "5039                   841.0          NaN     Crime|Drama|Mystery|Thriller   \n",
       "5040                     0.0          NaN            Drama|Horror|Thriller   \n",
       "5041                   946.0      10443.0             Comedy|Drama|Romance   \n",
       "5042                    86.0      85222.0                      Documentary   \n",
       "\n",
       "      ... num_user_for_reviews language  country  content_rating       budget  \\\n",
       "0     ...               3054.0  English      USA           PG-13  237000000.0   \n",
       "1     ...               1238.0  English      USA           PG-13  300000000.0   \n",
       "2     ...                994.0  English       UK           PG-13  245000000.0   \n",
       "3     ...               2701.0  English      USA           PG-13  250000000.0   \n",
       "5     ...                738.0  English      USA           PG-13  263700000.0   \n",
       "...   ...                  ...      ...      ...             ...          ...   \n",
       "5038  ...                  6.0  English   Canada             NaN          NaN   \n",
       "5039  ...                359.0  English      USA           TV-14          NaN   \n",
       "5040  ...                  3.0  English      USA             NaN       1400.0   \n",
       "5041  ...                  9.0  English      USA           PG-13          NaN   \n",
       "5042  ...                 84.0  English      USA              PG       1100.0   \n",
       "\n",
       "      title_year actor_2_facebook_likes imdb_score  aspect_ratio  \\\n",
       "0         2009.0                  936.0        7.9          1.78   \n",
       "1         2007.0                 5000.0        7.1          2.35   \n",
       "2         2015.0                  393.0        6.8          2.35   \n",
       "3         2012.0                23000.0        8.5          2.35   \n",
       "5         2012.0                  632.0        6.6          2.35   \n",
       "...          ...                    ...        ...           ...   \n",
       "5038      2013.0                  470.0        7.7           NaN   \n",
       "5039         NaN                  593.0        7.5         16.00   \n",
       "5040      2013.0                    0.0        6.3           NaN   \n",
       "5041      2012.0                  719.0        6.3          2.35   \n",
       "5042      2004.0                   23.0        6.6          1.85   \n",
       "\n",
       "     movie_facebook_likes  \n",
       "0                   33000  \n",
       "1                       0  \n",
       "2                   85000  \n",
       "3                  164000  \n",
       "5                   24000  \n",
       "...                   ...  \n",
       "5038                   84  \n",
       "5039                32000  \n",
       "5040                   16  \n",
       "5041                  660  \n",
       "5042                  456  \n",
       "\n",
       "[5036 rows x 28 columns]"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# thresh 表示 至少有多少非空的值的数据会被保留\n",
    "data.dropna(thresh=20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "id": "247b4b77",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False    4935\n",
       "True      108\n",
       "Name: title_year, dtype: int64"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data['title_year'].isnull().value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "id": "e92a2561",
   "metadata": {},
   "outputs": [],
   "source": [
    "new_data = data.dropna(subset=['title_year'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "id": "e4d2ac8e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False    4935\n",
       "Name: title_year, dtype: int64"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "new_data['title_year'].isnull().value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "2769624a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 5043 entries, 0 to 5042\n",
      "Data columns (total 28 columns):\n",
      " #   Column                     Non-Null Count  Dtype  \n",
      "---  ------                     --------------  -----  \n",
      " 0   color                      5024 non-null   object \n",
      " 1   director_name              4939 non-null   object \n",
      " 2   num_critic_for_reviews     4993 non-null   float64\n",
      " 3   duration                   5043 non-null   float64\n",
      " 4   director_facebook_likes    4939 non-null   float64\n",
      " 5   actor_3_facebook_likes     5020 non-null   float64\n",
      " 6   actor_2_name               5030 non-null   object \n",
      " 7   actor_1_facebook_likes     5036 non-null   float64\n",
      " 8   gross                      4159 non-null   float64\n",
      " 9   genres                     5043 non-null   object \n",
      " 10  actor_1_name               5036 non-null   object \n",
      " 11  movie_title                5043 non-null   object \n",
      " 12  num_voted_users            5043 non-null   int64  \n",
      " 13  cast_total_facebook_likes  5043 non-null   int64  \n",
      " 14  actor_3_name               5020 non-null   object \n",
      " 15  facenumber_in_poster       5030 non-null   float64\n",
      " 16  plot_keywords              4890 non-null   object \n",
      " 17  movie_imdb_link            5043 non-null   object \n",
      " 18  num_user_for_reviews       5022 non-null   float64\n",
      " 19  language                   5031 non-null   object \n",
      " 20  country                    5043 non-null   object \n",
      " 21  content_rating             4740 non-null   object \n",
      " 22  budget                     4551 non-null   float64\n",
      " 23  title_year                 4935 non-null   float64\n",
      " 24  actor_2_facebook_likes     5030 non-null   float64\n",
      " 25  imdb_score                 5043 non-null   float64\n",
      " 26  aspect_ratio               4714 non-null   float64\n",
      " 27  movie_facebook_likes       5043 non-null   int64  \n",
      "dtypes: float64(13), int64(3), object(12)\n",
      "memory usage: 1.1+ MB\n"
     ]
    }
   ],
   "source": [
    "data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "id": "4e462e20",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.read_csv('./data/movie_metadata.csv', dtype={'title_year':str})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "id": "85aeb2a2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 5043 entries, 0 to 5042\n",
      "Data columns (total 28 columns):\n",
      " #   Column                     Non-Null Count  Dtype  \n",
      "---  ------                     --------------  -----  \n",
      " 0   color                      5024 non-null   object \n",
      " 1   director_name              4939 non-null   object \n",
      " 2   num_critic_for_reviews     4993 non-null   float64\n",
      " 3   duration                   5028 non-null   float64\n",
      " 4   director_facebook_likes    4939 non-null   float64\n",
      " 5   actor_3_facebook_likes     5020 non-null   float64\n",
      " 6   actor_2_name               5030 non-null   object \n",
      " 7   actor_1_facebook_likes     5036 non-null   float64\n",
      " 8   gross                      4159 non-null   float64\n",
      " 9   genres                     5043 non-null   object \n",
      " 10  actor_1_name               5036 non-null   object \n",
      " 11  movie_title                5043 non-null   object \n",
      " 12  num_voted_users            5043 non-null   int64  \n",
      " 13  cast_total_facebook_likes  5043 non-null   int64  \n",
      " 14  actor_3_name               5020 non-null   object \n",
      " 15  facenumber_in_poster       5030 non-null   float64\n",
      " 16  plot_keywords              4890 non-null   object \n",
      " 17  movie_imdb_link            5043 non-null   object \n",
      " 18  num_user_for_reviews       5022 non-null   float64\n",
      " 19  language                   5031 non-null   object \n",
      " 20  country                    5038 non-null   object \n",
      " 21  content_rating             4740 non-null   object \n",
      " 22  budget                     4551 non-null   float64\n",
      " 23  title_year                 4935 non-null   object \n",
      " 24  actor_2_facebook_likes     5030 non-null   float64\n",
      " 25  imdb_score                 5043 non-null   float64\n",
      " 26  aspect_ratio               4714 non-null   float64\n",
      " 27  movie_facebook_likes       5043 non-null   int64  \n",
      "dtypes: float64(12), int64(3), object(13)\n",
      "memory usage: 1.1+ MB\n"
     ]
    }
   ],
   "source": [
    "data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "id": "eb49d9c4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0                                                 avatar \n",
       "1               pirates of the caribbean: at world's end \n",
       "2                                                spectre \n",
       "3                                  the dark knight rises \n",
       "4       star wars: episode vii - the force awakens    ...\n",
       "                              ...                        \n",
       "5038                             signed sealed delivered \n",
       "5039                           the following             \n",
       "5040                                a plague so pleasant \n",
       "5041                                    shanghai calling \n",
       "5042                                   my date with drew \n",
       "Name: movie_title, Length: 5043, dtype: object"
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data['movie_title'].str.upper()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "id": "2044f5ab",
   "metadata": {},
   "outputs": [],
   "source": [
    "data.to_csv(\"data/newMovieData.csv\",encoding=\"utf8\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ce174916",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
